import os
import sys

from pyspark import SparkConf, SparkContext

os.environ["PYSPARK_PYTHON"] = sys.executable

conf = SparkConf().setMaster("local[*]").setAppName("spark_text_app").set("spark.python.worker.timeout", "6000").set(
    "spark.executorEnv.PYTHONHASHSEED", "0")
sc = SparkContext(conf=conf)
rdd = sc.textFile("2011年1月销售数据.txt")
rdd = rdd.map(lambda x: tuple(x.split(","))).map(lambda x: (x[0], int(x[2]))).reduceByKey(lambda x, y: x + y).map(lambda x:(x[0], x[1]))
rdd_list = rdd.collect()
rdd_dict=dict(rdd_list)
print(rdd_dict.values())
sc.stop()
